# Title: 
# - False Discovery Rate (FDR) Estimation for Gridded Data

# Objective:
# - Adjusting p-values Using FDR-LBE Estimation procedure

# Authors:
# - Oliver Gutiérrez-Hernández: http://orcid.org/0000-0003-2580-5465. E-mail: olivergh@uma.es
# - Luis V. García: http://orcid.org/0000-0002-5514-2941. E-mail: lv.garcia@csic.es

# Source:
# - Gutiérrez-Hernández, O., & García, L.V. (2025). 
#   False Discovery Rate Estimation and Control in Remote Sensing: 
#   Reliable Statistical Significance in Spatially Dependent Gridded Data. 
#   Remote Sensing Letters, 16(5), 537–554. https://doi.org/10.1080/2150704X.2025.2478664

# References for FDR-LBE Estimation procedure:
# - Dalmasso, Cyril., Broët, Philippe., & Moreau, Thierry. 2005. “A Simple Procedure for Estimating the False Discovery Rate.”  
#   Bioinformatics 21 (5): 660–668. https://doi.org/10.1093/bioinformatics/bti063
# - Dalmasso, Cyril. 2024. LBE: Estimation of the False Discovery Rate. R Package Version 1.74.0. 
#   Bioconductor. https://bioconductor.org/packages/release/bioc/html/LBE.html

# Practical Applications:
# - Any scenario requiring rigorous multiple hypothesis testing on spatial raster data based on p-values to adjust and reduce false discoveries.
# - Particularly suitable for large datasets, where estimating the proportion of true null hypotheses (\( \pi_0 \)) enhances the reliability of p-value adjustments:
#     - When \( \pi_0 = 1 \): The LBE method converges with results from the standard FDR-BH procedure, as it assumes all hypotheses are null.
#     - When \( \pi_0 < 1 \): The LBE method identifies a proportion of true alternative hypotheses (\( 1 - \pi_0 \)), allowing it to detect more significant positives than FDR-BH. 
# - Note: It is recommended to compare results obtained from LBE with those of FDR-BH to validate findings and ensure consistency, especially in datasets with complex dependencies or when \( \pi_0 \) is significantly below 1.
# - While LBE can handle datasets with positive dependencies effectively.

# Input:
# - Raster file: Image in TIFF format containing p-values for analysis.
# - Provide the full path to a raster file containing p-values in TIFF format (.tif). Example: "C:/data/p-values.tif".
# - Ensure the raster file is properly formatted:
#   - p-values must range between 0 and 1.
#   - Invalid or missing values should be clearly identified (e.g., -9999).

# Outputs:
# 1. Summary Statistics:
#    - Total number of valid pixels (non-NA).
#    - Number and percentage of significant pixels (raw p-values and adjusted q-values).
#    - Proportion of true null hypotheses (\( \pi_0 \)).
#    - FDR estimation using LBE.
#
# 2. Graphical Outputs:
#    - LBE plot showing estimated parameters.
#    - Visualisation of raw p-values, adjusted q-values, and significance maps.
#
# 3. File Outputs (maps):
#    - Continuous raster of adjusted q-values: <base_name>_adjusted_FDR-LBE.tif
#    - Binary raster of significant pixels (raw p-values): <base_name>_binary_original.tif
#    - Binary raster of significant pixels (adjusted q-values): <base_name>_binary_FDR-LBE.tif


# --- Prerequisites --- # !!! ATTENTION: ▲ ▲ ▲ ▲ ▲ ▲ ▲ ▲ ▲ ▲ ▲ ▲ ▲

# Install necessary packages before running the script, including dependencies, only if not already installed.

# Function to check and install a package if not already installed
install_if_missing <- function(package) {
  if (!requireNamespace(package, quietly = TRUE)) {
    install.packages(package, dependencies = TRUE)
  } else {
    cat(paste("Package", package, "is already installed.\n"))
  }
}

# Install 'terra' for handling raster data
install_if_missing("terra")

# Install 'LBE' for estimating FDR
install_if_missing("BiocManager")
if (!requireNamespace("LBE", quietly = TRUE)) {
  BiocManager::install("LBE", dependencies = TRUE)
} else {
  cat("Package LBE is already installed.\n")
}

# --- Load necessary libraries ---
library(terra)   # For handling raster data; LBE (from LBE package) is applied to raster values extracted via terra
library(LBE)     # For estimating FDR

# --- Set paths and load data ---
raster_path <- "C:/data/p-values.tif"  # !!! ATTENTION: REPLACE WITH YOUR FILE PATH ▲ ▲ ▲ ▲ ▲ ▲ ▲ ▲ ▲ ▲ ▲ ▲ ▲
raster_pvalues <- rast(raster_path)    # Load the raster data using terra
raster_base_name <- tools::file_path_sans_ext(basename(raster_path))  # Extract the base name for file outputs

# --- Information about the raster file ---
cat("\n--- Raster Information ---\n")  
cat("Total number of pixels:", ncell(raster_pvalues), "\n")  # Display the total number of pixels in the raster

# Count valid and invalid pixels
p_values <- values(raster_pvalues)  # Extract the p-values (from the raster)
valid_indices <- !is.na(p_values)  # Identify valid pixels (not NA)
num_valid_pixels <- sum(valid_indices)  # Count valid pixels
num_invalid_pixels <- sum(is.na(p_values))  # Count invalid pixels (NA)
cat("Number of total pixels:", ncell(raster_pvalues), "\n")  # Display the total number of pixels
cat("Number of invalid pixels (NA):", num_invalid_pixels, "\n")  # Display the number of invalid pixels
cat("Number of valid pixels (containing p-values derived from hypothesis testing):", num_valid_pixels, "\n")  # Valid pixels

# --- Process p-values ---
p_values <- values(raster_pvalues)      # Extract p-values from the raster
valid_indices <- !is.na(p_values)       # Identify valid indices where p-values are not NA
adjusted_p_values <- rep(NA, length(p_values))    # Initialise an empty vector to store adjusted p-values
adjusted_p_values[valid_indices] <- p.adjust(p_values[valid_indices], method = "BH")  # Apply the Benjamini-Hochberg procedure to adjust p-values for FDR control

# --- Apply LBE method to estimate FDR ---
LBE_result <- LBE(pval = p_values[valid_indices], plot.type = "none")  # Apply LBE to non-NA values
adjusted_p_values <- rep(NA, length(p_values))     # Initialise adjusted q-values
adjusted_p_values[valid_indices] <- LBE_result$qvalues  # Fill adjusted values
pi0 <- LBE_result$pi0                              # Proportion of true null hypotheses

# --- Generate binary maps based on p-value thresholds ---
significant_original <- p_values < 0.05  # Binary map for raw p-values: p-values <= 0.05
significant_adjusted <- adjusted_p_values < 0.05  # Binary map for FDR-adjusted p-values: adjusted p-values <= 0.05

# Convert to rasters for visualization
binary_raster_original <- setValues(raster_pvalues, significant_original)  # Binary raster for original p-values
binary_raster_adjusted <- setValues(raster_pvalues, significant_adjusted)  # Binary raster for adjusted p-values

# --- Calculate percentages ---
num_significant_pixels <- sum(significant_adjusted, na.rm = TRUE)  # Number of significant pixels after adjustment
percentage_significant <- (num_significant_pixels / num_valid_pixels) * 100  # Percentage of significant pixels
percentage_non_significant <- 100 - percentage_significant  # Percentage of non-significant pixels

# --- Summary Statistics ---

cat("\n--- Summary Statistics ---\n")
cat("Total valid pixels:", num_valid_pixels, "(100%)\n")  # Total valid pixels
cat("Significant pixels (raw p-values < 0.05):", sum(significant_original, na.rm = TRUE), "\n")  # Significant pixels (raw p-values)
cat("Significant pixels (adjusted q-values < 0.05):", num_significant_pixels, "(", round(percentage_significant, 2), "%)\n")  # Significant pixels (adjusted q-values)
cat("Proportion of true null hypotheses (\u03c00):", pi0, "\n")  # Proportion of true null hypotheses
cat("Estimated FDR (LBE):", sprintf("%.8f", LBE_result$FDR), "\n")  # Estimated FDR (from LBE)
LBEsummary(LBE_result) # Displays the estimated proportion of true null hypotheses, the confidence interval for this proportion, and compares the number of significant calls for p-values and estimated q-values using specified cutoffs.

# --- Graphical Outputs ---

# Plot the histogram for the transformed p-values
hist(-log10(p_values), breaks = 50, 
     main = "Histogram of Transformed p-values", 
     xlab = "-log10(p-values)", 
     col = "skyblue", 
     border = "white")

# Add a vertical line for the estimated value of pi0
abline(v = -log10(LBE_result$pi0), col = "red", lwd = 2, lty = 2)

# Update the legend to include the tuning parameter (a)
legend("topright", legend = bquote(hat(pi)[0] ~ "(" ~ italic(a) ~ ")" ~ "=" ~ .(round(LBE_result$pi0, 4))), 
       col = "red", lty = 2, lwd = 2)

# LBE plot: Visualise LBE results
LBEplot(LBE_result, plot.type = "multiple")
LBE_result_graphical <- LBE(pval = p_values[valid_indices], plot.type = "main")

# Visualise raw p-values, binary maps, and adjusted q-values
par(mfrow = c(2, 2))  # 2 rows and 2 columns layout for map visualisation
plot(raster_pvalues, main = expression("Raw "*italic(p)*"-values"), col = hcl.colors(100, "Blues"))
plot(binary_raster_original, main = "Significant trends (original)", col = c("white", "orange"), legend = FALSE)
plot(setValues(raster_pvalues, adjusted_p_values), main = expression("Adjusted "*italic(p)*"-values (FDR-LBE)"), col = hcl.colors(100, "Blues"))
plot(binary_raster_adjusted, main = "Significant trends (FDR-LBE)", col = c("white", "orange"), legend = FALSE)

# --- File Outputs (maps) ---
raster_base_name <- tools::file_path_sans_ext(basename(raster_path))  # Extract base name for output file names
writeRaster(setValues(raster_pvalues, adjusted_p_values), paste0(raster_base_name, "_adjusted_FDR-LBE.tif"), overwrite = TRUE)  # Save adjusted p-values as raster
writeRaster(binary_raster_original, paste0(raster_base_name, "_binary_original.tif"), overwrite = TRUE)  # Save binary map for original p-values
writeRaster(binary_raster_adjusted, paste0(raster_base_name, "_binary_FDR-LBE.tif"), overwrite = TRUE)  # Save binary map for FDR-adjusted p-values

# --- Display file outputs ---
cat("\n--- File Outputs ---\n")
cat("Adjusted q-values raster: <base_name>_adjusted_FDR-LBE.tif\n")
cat("Binary raster (raw p-values): <base_name>_binary_original.tif\n")
cat("Binary raster (adjusted q-values): <base_name>_binary_FDR-LBE.tif\n")
